#Set up

#s1
rm(list=ls())

setwd ("C:/Users/cecidy/Google Drive/ZH_analysis/")

source("ZHfunctions.R")
options(scipen=9999)

ZeroHunger <- read.csv("Data/ZHdf/ZeroHungerDF_2020.csv",header=TRUE, sep=",")

#s2
################################################################################################################

#start1_

#PRONAF and Child malnutrition core sample

#######################################################################################################################

#Make a nonBF ZHvariable!

summary(ZeroHunger$PRONAFPerCapTotal04to13for2004Analysis1000)

ZeroHunger$ZHPerCapTotal04to13for2004Analysis1000NOPRONAF <- with(ZeroHunger,ZHPerCapTotal04to13for2004Analysis1000-PRONAFPerCapTotal04to13for2004Analysis1000)

#######################################################################################################################

#Add 100 to the malnutrition variables

ZeroHunger$BirthWeighedFull2004for2004Analysis100 <- ifelse(is.na(ZeroHunger$BirthWeighedFull2004for2004Analysis),NA,ZeroHunger$BirthWeighedFull2004for2004Analysis*100)
ZeroHunger$BirthWeighedFull2013for2004Analysis100 <- ifelse(is.na(ZeroHunger$BirthWeighedFull2013for2004Analysis),NA,ZeroHunger$BirthWeighedFull2013for2004Analysis*100)

ZeroHunger$OneWeighedAllFour2004for2004Analysis100 <- ifelse(is.na(ZeroHunger$OneWeighedAllFour2004for2004Analysis),NA,ZeroHunger$OneWeighedAllFour2004for2004Analysis*100)
ZeroHunger$OneWeighedAllFour2013for2004Analysis100 <- ifelse(is.na(ZeroHunger$OneWeighedAllFour2013for2004Analysis),NA,ZeroHunger$OneWeighedAllFour2013for2004Analysis*100)

ZeroHunger$BirthUnderweightFullPerc2004for2004Analysis100 <- ifelse(is.na(ZeroHunger$BirthUnderweightFullPerc2004for2004Analysis),NA,ZeroHunger$BirthUnderweightFullPerc2004for2004Analysis*100)
ZeroHunger$BirthUnderweightFullPerc2013for2004Analysis100 <- ifelse(is.na(ZeroHunger$BirthUnderweightFullPerc2013for2004Analysis),NA,ZeroHunger$BirthUnderweightFullPerc2013for2004Analysis*100)

ZeroHunger$OneUnderweightAllFourPerc2004for2004Analysis100 <- ifelse(is.na(ZeroHunger$OneUnderweightAllFourPerc2004for2004Analysis),NA,ZeroHunger$OneUnderweightAllFourPerc2004for2004Analysis*100)
ZeroHunger$OneUnderweightAllFourPerc2013for2004Analysis100 <- ifelse(is.na(ZeroHunger$OneUnderweightAllFourPerc2013for2004Analysis),NA,ZeroHunger$OneUnderweightAllFourPerc2013for2004Analysis*100)

#####################################################################################################################

#1. Prepare each dataframe

#Get Rural municipalities
Poverty04 <- subset(ZeroHunger, PopDensity20042004Analysis<=150)

#Get list of vars
load(file = "Data/ForSamples/MPI04ListOnly04.rda")
load(file = "Data/ForSamples/Basic04.rda")

RuralPoverty04 <- subset(Poverty04, select=c("PRONAFPerCapTotal04to13for2004Analysis1000","ZHPerCapTotal04to13for2004Analysis1000NOPRONAF",Basic04,
                                             MPI04ListOnly04,
                                             "BirthUnderweightFullPerc2004for2004Analysis100",
                                             "BirthUnderweightFullPerc2013for2004Analysis100",
                                             "OneUnderweightAllFourPerc2004for2004Analysis100",
                                             "OneUnderweightAllFourPerc2013for2004Analysis100"))
names(RuralPoverty04)

RuralPoverty04 <- na.omit(RuralPoverty04)

#Then take away the ones affected by rural credit
load(file = "Data/ForSamples/EcludeForRCProb04.rda")

RuralPoverty04 <- subset(RuralPoverty04,(!(IBGECode7digit %in% EcludeForRCProb04)))

#Then do the quality
RuralPoverty04quality <- RuralPoverty04

#############################################################################################

#get in MPI13, do the MPI function. 
load(file = "Data/ForSamples/MPI04ListOnly13.rda")

TempVars <- subset(Poverty04,select=c("IBGECode7digit",MPI04ListOnly13))
RuralPoverty04quality <- merge(x=RuralPoverty04quality,y=TempVars, by="IBGECode7digit",all.x=T)

#############################################################################################

#Make logged variables

#First make MPI with the correct 0s
test <- NrOfZeroALL(RuralPoverty04quality)#This gives me all the vars with 0s
MPIadd <- test[[1]]#looks like all need 0, can this be?
Otheradd <- test[[2]]

#For the MPI because Ive not previously renamed when adding I just override the vars with 0 with the minimum/2
WithAdded <- lapply(MPIadd,function(y) AddConstant(RuralPoverty04quality,y,addName=""))
WithAdded <- data.frame(do.call("cbind",WithAdded))

RuralPoverty04quality[,MPIadd] <- WithAdded[,MPIadd]#just override

#make the MPI - OBS 04 FUNCTION
RuralPoverty04quality <- MakeMPI04(RuralPoverty04quality)

#######################################################################################################################

#Then for the others I cbind them on, and because Ive not included any of previous noCero there no duplicates
RuralPoverty04quality <- cbind(RuralPoverty04quality,lapply(Otheradd,function(y) AddConstant(RuralPoverty04quality,y,addName="nocero")))#

###########################################################################################################################

#And finally I need to log variables - get a list of all names

OtheraddCero <- paste(Otheradd,"nocero",sep="")#here which are names no cero

#All these need log
cols.log <- c("ZHPerCapTotal04to13for2004Analysis1000NOPRONAF","MPI2004for2004Analysis",
              "GDPPerCapPublicrealN2004for2004Analysis1000",OtheraddCero,"RemoteMinPopFifty2004Analysis",
              "PopDensity20042004Analysis","SizeKm22004Analysis","MeanSlopefor2004analysis","MeanElevationfor2004analysis")
addlog <- function(x) log10(x)
Log10variables <- data.frame(sapply(RuralPoverty04quality[cols.log],addlog))
colnames(Log10variables) <- paste(colnames(Log10variables), "log10",sep="")

#Get all onto ZH
RuralPoverty04quality <- cbind(RuralPoverty04quality,Log10variables)
head(RuralPoverty04quality)

#but for PRONAF I dont want to have the nocero there
colnames(RuralPoverty04quality)[colnames(RuralPoverty04quality)=="PRONAFPerCapTotal04to13for2004Analysis1000nocerolog10"] <- "PRONAFPerCapTotal04to13for2004Analysis1000log10"

###########################################################################################################################

#Need to take away all the MPI13 again, if not then later it messes up the size
AllMPI13 <- c(MPI04ListOnly13,"MPIHealthChildMalnutrition2013for2004Analysis","MPIHealth2013for2004Analysis","MPI2013for2004Analysis",
              "MPI2013for2004Analysis","MPILivingStandard2013for2004Analysis")
RuralPoverty04quality <- RuralPoverty04quality[, !names(RuralPoverty04quality) %in% AllMPI13]

############################

#Create child malnutrition variable

gmean <- function(x){
  # geometric mean calculation
  prodx <- prod(x)
  n   <- length(x)
  gm <- prodx**(1/n)
  gm
}

RuralPoverty04quality$ChildUnderweightPerc2004for2004Analysis100 <- apply(RuralPoverty04quality[,c("BirthUnderweightFullPerc2004for2004Analysis100nocero",
                                                                                                   "OneUnderweightAllFourPerc2004for2004Analysis100nocero")], 1, gmean)
RuralPoverty04quality$ChildUnderweightPerc2013for2004Analysis100 <- apply(RuralPoverty04quality[,c("BirthUnderweightFullPerc2013for2004Analysis100nocero",
                                                                                                   "OneUnderweightAllFourPerc2013for2004Analysis100nocero")], 1, gmean)


RuralPoverty04quality[,c("ChildUnderweightPerc2004for2004Analysis100",
                         "ChildUnderweightPerc2013for2004Analysis100")] <- round(RuralPoverty04quality[,c("ChildUnderweightPerc2004for2004Analysis100",
                                                                                                          "ChildUnderweightPerc2013for2004Analysis100")],digits=0)

#########################################################################

#I make it on log here but that I might have to go back and change after checking for log or not!
RuralPoverty04quality<- getDataFrame(RuralPoverty04quality,"contr.Sum",
                                     contrastVar1="stateName",contrastVar2="Biomefor2004Analysis",setRefToMostCommonLevel,
                                     BaseVar="PRONAFPerCapTotal04to13for2004Analysis1000log10",
                                     MainName="PRONAFPerCapTotal04to13for2004Analysis1000log10scaledMain")

#fin1_
###############################################################################################################

#start2_

#Prepare vectors of variables for regressions

###############################################################################################################

Depvar <- "ChildUnderweightPerc2013for2004Analysis100"
DepVarLog <- "ChildUnderweightPerc2013for2004Analysis100"

IndepVarLin <- "PRONAFPerCapTotal04to13for2004Analysis1000log10scaledMain"#already scaled so dont need to do it again

baselineDepVar <- "scale(ChildUnderweightPerc2004for2004Analysis100)"

stateName <- "stateName"

intLin <- c("PRONAFPerCapTotal04to13for2004Analysis1000log10scaledMain","stateName")
intLin <- paste(intLin,collapse="*")

xvars <- c("scale(MPI2004for2004Analysislog10)",
           "scale(GDPPerCapPublicrealN2004for2004Analysis1000log10)",
           "scale(TotalKcal2004percapperdaynocerolog10)",
           "scale(TotalHectareCrops2004for2004Analysisnocerolog10)","scale(TotalHectarePasture2006for2004Analysisnocerolog10)",
           "scale(TotalHectareFarmsLess502006for2004Analysisnocerolog10)","scale(RemoteMinPopFifty2004Analysislog10)",
           "scale(ChangeCummulativeSPEI02and04to11and13for2004Analysis)","scale(TotRCpcNOPRONAF04to13for2004Analysis1000nocerolog10)",
           "scale(MeanSlopefor2004analysislog10)","scale(MeanElevationfor2004analysislog10)",
           "scale(PopDensity20042004Analysislog10)","scale(SizeKm22004Analysislog10)","scale(ZHPerCapTotal04to13for2004Analysis1000NOPRONAFlog10)",
           "Biomefor2004Analysis")

#For CBPS models
cbpsDepVar <- "PRONAFPerCapTotal04to13for2004Analysis1000log10"
cbpsbaselineDepVar <- "ChildUnderweightPerc2004for2004Analysis100"

cbpsVars <- c("MPI2004for2004Analysislog10",
              "GDPPerCapPublicrealN2004for2004Analysis1000log10","TotalKcal2004percapperdaynocerolog10",
              "TotalHectareCrops2004for2004Analysisnocerolog10","TotalHectarePasture2006for2004Analysisnocerolog10",
              "TotalHectareFarmsLess502006for2004Analysisnocerolog10","RemoteMinPopFifty2004Analysislog10",
              "ChangeCummulativeSPEI02and04to11and13for2004Analysis","TotRCpcNOPRONAF04to13for2004Analysis1000nocerolog10",
              "MeanSlopefor2004analysislog10","MeanElevationfor2004analysislog10",
              "PopDensity20042004Analysislog10","SizeKm22004Analysislog10","ZHPerCapTotal04to13for2004Analysis1000NOPRONAFlog10",
              "stateName","Biomefor2004Analysis")

#########################################################################################################################

#fin2_

#temp1
#temp2

################################################################################################################################

